In this brief example, I show how to use the GPS coordinates from the Demographic and Health Survey data and merge them to the ADM2 subnational geographic level for the country of Ethopia. Then I produce estimates using the DHS data for ADM 2 regions of the country.

This is possible by useing the GIS capacity of the sf package to spatially intersect the DHS points and the ADM 2 polygons.

library(sf)
## Linking to GEOS 3.9.0, GDAL 3.2.1, PROJ 7.2.1
library(mapview)

Read in dhs points

ethpoly <- st_read(dsn = "~/OneDrive - University of Texas at San Antonio//students/fikre/spatial_epi/ETH_adm2.shp")
## Reading layer `ETH_adm2' from data source `C:\Users\ozd504\OneDrive - University of Texas at San Antonio\students\fikre\spatial_epi\ETH_adm2.shp' using driver `ESRI Shapefile'
## Simple feature collection with 72 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 32.99994 ymin: 3.322099 xmax: 47.98618 ymax: 14.89996
## Geodetic CRS:  WGS 84
ethpoly$struct <- 1:dim(ethpoly)[1]

plot(ethpoly["struct"])

Read in dhs sample locations and ADM 2 regions.

The adm2 shapefile can be found in the Diva GIS international data repository, or from the IPUMS International site below I use the ADM2 level of administrative geography.

These locations are not identified in the DHS, but by performing a spatial intersection, we can merge the DHS survey locations to the ADM 2 units

eth_dots<-st_read("~/OneDrive - University of Texas at San Antonio//students//fikre/ethiopia_dhs/ETGE52FL/ETGE52FL.shp")
## Reading layer `ETGE52FL' from data source `C:\Users\ozd504\OneDrive - University of Texas at San Antonio\students\fikre\ethiopia_dhs\ETGE52FL\ETGE52FL.shp' using driver `ESRI Shapefile'
## Simple feature collection with 535 features and 20 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 0 ymin: 0 xmax: 43.36409 ymax: 14.50379
## Geodetic CRS:  WGS 84
eth_dots <- eth_dots[eth_dots$LATNUM>0,]
eth_adm2<-st_read("~/OneDrive - University of Texas at San Antonio//students//fikre/spatial_epi/ETH_adm2.shp")
## Reading layer `ETH_adm2' from data source `C:\Users\ozd504\OneDrive - University of Texas at San Antonio\students\fikre\spatial_epi\ETH_adm2.shp' using driver `ESRI Shapefile'
## Simple feature collection with 72 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 32.99994 ymin: 3.322099 xmax: 47.98618 ymax: 14.89996
## Geodetic CRS:  WGS 84
#merge dots to administrative data
eth_dots2000<-st_intersection(eth_dots, eth_adm2)
## although coordinates are longitude/latitude, st_intersection assumes that they are planar
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
mapview(eth_dots["DHSCLUST"])+mapview(eth_adm2["NAME_2"])

Read in DHS survey and recode stunting outcome

Next, I use the DHS survey data to estimate the prevalence of stunting in the ADM 2 regions.

library(haven)
dhs2000<-read_dta("~/OneDrive - University of Texas at San Antonio//students//fikre/ethiopia_dhs/ETKR41DT/ETKR41FL.DTA")
dhs2000<-zap_labels(dhs2000)

library(car)
## Loading required package: carData
dhs2000$stunting<-ifelse(dhs2000$hw5/100<=-2&dhs2000$hw5/100!=-2,1,0)
#dhs2000$sex<-dhs2000$hc27

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
dhs2000<-dhs2000%>%
  mutate(wt = v005/1000000)%>%
  filter(complete.cases(stunting))%>%
  select(v001,stunting, wt, v021, v022)

Merge survey data to sample locations

dhs2000m<-merge(dhs2000, eth_dots2000, by.x="v001", by.y="DHSCLUST")

Create survey estimates for new regions after spatial intersection

library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
options(survey.lonely.psu = "adjust")
des<-svydesign(ids = ~v021, strata = ~v022, weights = ~wt, data=dhs2000m)
names(dhs2000m)
##  [1] "v001"       "stunting"   "wt"         "v021"       "v022"      
##  [6] "DHSID"      "DHSCC"      "DHSYEAR"    "CCFIPS"     "ADM1FIPS"  
## [11] "ADM1FIPSNA" "ADM1SALBNA" "ADM1SALBCO" "ADM1DHS"    "ADM1NAME"  
## [16] "DHSREGCO"   "DHSREGNA"   "SOURCE"     "URBAN_RURA" "LATNUM"    
## [21] "LONGNUM"    "ALT_GPS"    "ALT_DEM"    "DATUM"      "ID_0"      
## [26] "ISO"        "NAME_0"     "ID_1"       "NAME_1"     "ID_2"      
## [31] "NAME_2"     "TYPE_2"     "ENGTYPE_2"  "NL_NAME_2"  "VARNAME_2" 
## [36] "geometry"
est.stunt <- svyby(~stunting, ~ID_2, des, FUN=svymean, na.rm=T)

head(est.stunt)
##   ID_2  stunting         se
## 1    1 0.4236812 0.02842176
## 3    3 0.4964883 0.07773643
## 4    4 0.5227321 0.10306007
## 5    5 0.5469683 0.04175304
## 6    6 0.3879567 0.10738629
## 7    7 0.6280925 0.02028507

merge estimates to map and map stunting prevalence

library(tigris)
## To enable 
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
library(mapview)
mdat<- geo_join(ethpoly, est.stunt, "ID_2","ID_2")
## Warning: `group_by_()` was deprecated in dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
mapview(mdat["stunting"])